* ============================================================================ *
* PROJECT:		Is Incumbency Advantage Gendered?
* AUTHOR: 		Semra Sevi 
* DATE:			2022-01-25
* ============================================================================ *

***************************************
* Preparing the data and descriptives * 
***************************************

net install palettes, replace from("https://raw.githubusercontent.com/benjann/palettes/master/")
net install colrspace, replace from("https://raw.githubusercontent.com/benjann/colrspace/master/")
net install rdrobust, from("https://raw.githubusercontent.com/rdpackages/rdrobust/master/stata") replace
net install rddensity, from("https://raw.githubusercontent.com/rdpackages/rddensity/master/stata") replace
net install rdmulti, from("https://raw.githubusercontent.com/rdpackages/rdmulti/master/stata") replace

use "federal-candidates-2021-12-14.dta"

* average number of seats in each Canadian federal election	
tab edate elected if year >= 1921 & type_elxn == 1
di 234+245+244+243+244+245+245+262+264+264+265+264+263+ ///
	264+263+264+263+281+281+282+295+295+301+301+308+308+308+308+338+338+338
di 8618/31

* avergae number of seats in each elxn from 1990
di 295+301+301+308+308+308+308+338+338+338
di 3143/10
	
* number of elections since 1960 
codebook year elected if year > 1960 & type_elxn == 1 
tab year elected if year > 1960	& type_elxn == 1 

* number of seats/ridings in each elxn from 1960
di 264+263+264+263+264+263+281+281+282+295+295+301+301+308+308+308+308+338+338+338
* average number of seats since 1960
di 5863/20	

* destring edate
gen edate2 = date(edate, "YMD")
format edate2 %td

* calculating the margin of victory 
gen negresult = -percent_votes
bysort edate province riding: egen candidate_rank = rank(negresult)
sort edate province riding candidate_rank 
by edate province riding: gen winner_result = percent_votes[1]
by edate province riding: gen second_result = percent_votes[2]

gen margin_victory = percent_votes - winner_result if candidate_rank != 1
replace margin_victory = percent_votes - second_result if candidate_rank == 1
* Acclaimed candidates do not have a margin of victory 
replace margin_victory = .a if acclaimed == 1
* Drop cases where the running variable is = to the cut-off (there are only 10)
* in early elections 
replace margin_victory = .b if margin_victory == 0
replace margin_victory = .c if multiple_candidacy == 1

order id edate province riding percent_votes candidate_rank elected ///
	winner_result second_result margin_victory type_elxn acclaimed 
	
gen margin_bins = floor(margin_victory/5)*5
replace margin_bins = margin_bins + 5 if margin_bins >= 0
	
* calculating outcome variables for the next election

sort id edate 	

by id: gen elected_next = elected[_n + 1] if parliament[_n + 1] == parliament + 1
replace elected_next = elected_next*100
by id: gen percent_votes_next = percent_votes[_n + 1] if parliament[_n + 1] == parliament + 1

* create variable for runs again 
by id: gen runs_again = 1 if ///
	parliament[_n + 1] == parliament + 1 & type_elxn[_n + 1] == 1 | /// 
	parliament[_n + 1] == parliament     & type_elxn[_n + 1] == 0
recode runs_again .=0 if parliament != 44
replace runs_again = runs_again*100


* create variable for runs again sometime in the future
by id: gen runs_again_anytime = 1 if _n < _N 
recode runs_again_anytime .=0 if parliament != 44
replace runs_again_anytime = runs_again_anytime*100


order id edate parliament percent_votes runs_again runs_again_anytime type_elxn

encode province, generate(prov)
* Quebec is spelled in two different ways so here we combine them 
recode prov 12=11

* artificial cut-off's to test for significance
gen artificial_margin = margin_victory + 500*(gender==1)
gen artificial_cutoff = 500*(gender==1)
order gender margin_victory artificial_margin artificial_cutoff

* options for rdmc
gen pv = 1 in 1/2
gen kv = "triangular" in 1/2
gen bwsv = "mserd" in 1/2
gen vcev = "nncluster year" in 1/2

su runs_again elected percent_votes

* variables for covariate balance in the appendix 

* number of women in each riding in each election
sort edate province riding gender
by edate province riding: gen num_women = sum(gender==1)
by edate province riding: replace num_women = num_women[_N]
	

* number of candidates in each riding in each election
sort edate province riding 
by edate province riding: gen num_candidates_new = _N
	

* electoral experience of a candidate at t 
* measured by the number of times a candidate has contested the election up to t	
sort id edate 
by id: gen electoral_exp = _n-1

* create variable for first election 
sort id edate 
gen first_elxn = 0
by id: replace first_elxn = 1 if _n == 1

table runs_again_anytime gender if first_elxn == 1 & year > 1990

tab runs_again_anytime if first_elxn == 1 & gender == 1 & year > 1990 & elected == 0
tab runs_again_anytime if first_elxn == 1 & gender == 2 & year > 1990 & elected == 0

* political experience measured by the number of times a candidate 
* has won an election up to t
sort id edate 
by id: gen political_exp = sum(elected)-elected

*Major Party
gen major_party = 0
replace major_party = 1 if party_minor_group == "Conservative" | party_minor_group == "Liberal"

* drop party switchers, acclaimed, byelections 
gen clean_case = 0
replace clean_case = 1 if !switcher & !acclaimed & type_elxn == 1
keep if clean_case

* encode party to include in models as a fixed effect 
encode party_major_group, gen(party_major_group_new)
*xi i.party_major_group_new*i.year
tostring year, gen(str_year)
gen _party_year_fe = party_major_group + str_year
encode _party_year_fe, gen(party_year_fe)	

tabulate year, generate(d_year)
tabulate prov, generate(d_prov)	

*cluster by province-riding 
gen prov_riding = province + riding 
encode prov_riding, gen(prov_riding_num)
encode riding, gen(riding_num)
gen province_riding_party = province + " " + riding + " " + party_major_group
encode province_riding_party, gen(prov_riding_party)	
	
* average number of parties in each riding
preserve
contract edate province riding party_minor_group if year >= 1990
drop _freq
contract edate province riding 
mean _freq
restore 	


* Figure 1. Percentage of women candidates and wins over time
preserve
gen female_dummy = 0 if gender == 0
replace female_dummy = 1 if gender == 1
gen elected_f = female_dummy if elected == 1
collapse (mean) female_dummy (mean) elected_f if type_elxn == 1 & year >= 1921, by(year) 
list year elected_f  
list year female_dummy 
replace elected_f = elected_f*100
replace female_dummy = female_dummy*100
twoway (line elected_f year, lcolor(erose) lwidth(medthick)) || (line female_dummy year, lwidth(medthick)), ///
	xlabel(1921(20)2021) xscale(r(1921 2021))  ytitle("Percentage of women") ///
	xtitle("Year") title("") legend(order(2 "Candidates" 1 "Incumbents" )) ///
	saving("women elected candidates.gph", replace)
restore	

* Figure 2. Percentage of Women and Men candidates who belong to the Liberal or Conservative Party of Canada
preserve 
collapse major_party if year >= 1921, by(gender year) 
replace major_party = major_party*100
label variable year  "Election" 

twoway (scatter major_party year if gender == 1, yaxis(1) symbol(smcircle) mcolor("250 128 114")) || ///
	 (lowess major_party year if gender == 1 , yaxis(1) symbol(smcircle) lcolor("250 128 114") lwidth(medthick)) || /// 
	(scatter major_party year if gender == 0, yaxis(1) symbol(smcircle) mcolor("32 178 170")) || ///
	 (lowess major_party year if gender == 0 , yaxis(1) symbol(smcircle) lcolor("32 178 170") lwidth(medthick)),  ///
	 legend(order(2 "Women" 4 "Men"))  ///
	 xlabel(1921(20)2021) ytitle("Percentage of candidates", axis(1)) 
restore 

	


* probability that an incumbent will be a candidate in the next election since 1921	
tab runs_again if margin_victory > 0 & year >= 1921	
* probability that an incumbent will be a candidate in the next election since 1990
tab runs_again if margin_victory > 0 & year >= 1990
* percentage of women incumbents who rerun
tab runs_again if gender == 1 & margin_victory > 0 & year >= 1990
* percentage of men incumbents who rerun
tab runs_again if gender == 0 & margin_victory > 0 & year >= 1990
* among the incumbents who rerun the proportion of winning the next election since 1921
tab elected_next if margin_victory > 0 & year >= 1921
* among the incumbents who rerun the proportion of winning the next election since 1990
tab elected_next if margin_victory > 0 & year >= 1990
* probability of incumbent men winning conditional on running for a given election	
tab elected_next if gender ==0 & margin_victory > 0 & year >= 1990
* probability of incumbent women winning conditional on running for a given election	
tab elected_next if gender ==1 & margin_victory > 0 & year >= 1990

* among men/women first runners, the percentage who rerun 	
tab runs_again_anytime gender if first_elxn == 1  & year >= 1990
tab runs_again_anytime gender if first_elxn == 1  & year >= 1990 & elected == 0
di 1509/6723
di 562/2633
tab runs_again_anytime gender if year >= 1990 & elected == 0

* ties at 0. 8 ppl. 	
gen check_margin = margin_victory == 0 if year >= 1921
		

* more descriptives
su runs_again if margin_victory > 0 & year >= 1921
su elected_next if margin_victory > 0 & year >= 1921
su percent_votes_next if margin_victory < 0 & year >= 1921	
	
* elected Members of Parliament 
codebook year if year > = 1990
tab elected if year >1990	
codebook year if year > = 1921

	
* % of elected over time	
tab elected_next year if elected 
preserve 
collapse elected_next if elected == 1, by(year)
scatter elected_next year
restore 

	
	
